{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "67c1e6b1",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "下面的例子将展示词向量标准工具包——gensim提供的词嵌入，并展示词嵌入如何表示词的相似度。\n",
    "<!-- https://nlp.stanford.edu/projects/glove/ -->"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5c5a740a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pprint\n",
    "\n",
    "from gensim.models import KeyedVectors\n",
    "\n",
    "# 从GloVe官网下载GloVe向量，此处使用的是glove.6B.zip\n",
    "# 解压缩zip文件并将以下路径改为解压后对应文件的路径\n",
    "model = KeyedVectors.load_word2vec_format('glove.6B.100d.txt', binary=False, no_header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "01a2e4a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('movie', 0.9055122137069702),\n",
      " ('films', 0.8914434909820557),\n",
      " ('directed', 0.8124362230300903),\n",
      " ('documentary', 0.8075793981552124),\n",
      " ('drama', 0.7929168939590454),\n",
      " ('movies', 0.7889865040779114),\n",
      " ('comedy', 0.7842751145362854),\n",
      " ('starring', 0.7573285698890686),\n",
      " ('cinema', 0.7419456839561462),\n",
      " ('hollywood', 0.7307389378547668)]\n",
      "[('vehicle', 0.8630837798118591),\n",
      " ('truck', 0.8597877025604248),\n",
      " ('cars', 0.837166965007782),\n",
      " ('driver', 0.8185910582542419),\n",
      " ('driving', 0.781263530254364),\n",
      " ('motorcycle', 0.7553157210350037),\n",
      " ('vehicles', 0.7462257146835327),\n",
      " ('parked', 0.74594646692276),\n",
      " ('bus', 0.737270712852478),\n",
      " ('taxi', 0.7155268788337708)]\n"
     ]
    }
   ],
   "source": [
    "# 使用most_similar()找到词表中距离给定词最近（最相似）的n个词\n",
    "pprint.pprint(model.most_similar('film'))\n",
    "pprint.pprint(model.most_similar('car'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8b62f7ad",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "japanese\n",
      "panda\n",
      "longest\n",
      "terrible\n",
      "queen\n"
     ]
    }
   ],
   "source": [
    "# 利用GloVe展示一个类比的例子\n",
    "def analogy(x1, x2, y1):\n",
    "    # 寻找top-N最相似的词。\n",
    "    result = model.most_similar(positive=[y1, x2], negative=[x1])\n",
    "    return result[0][0]\n",
    "\n",
    "print(analogy('china', 'chinese', 'japan'))\n",
    "print(analogy('australia', 'koala', 'china'))\n",
    "print(analogy('tall', 'tallest', 'long'))\n",
    "print(analogy('good', 'fantastic', 'bad'))\n",
    "print(analogy('man', 'woman', 'king'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c308cee",
   "metadata": {},
   "source": [
    "下面将展示word2vec的代码，包括文本预处理、skipgram算法的实现、以及使用PyTorch进行优化。这里使用《小王子》这本书作为训练语料。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "590fc408",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 安装NLTK，使用如下代码下载punkt组件\n",
    "#import nltk\n",
    "#nltk.download('punkt')\n",
    "\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "from collections import defaultdict\n",
    "\n",
    "# 使用类管理数据对象，包括文本读取、文本预处理等\n",
    "class TheLittlePrinceDataset:\n",
    "    def __init__(self, tokenize=True):\n",
    "        # 利用NLTK函数进行分句和分词\n",
    "        text = open('the little prince.txt', 'r', encoding='utf-8').read()\n",
    "        if tokenize:\n",
    "            self.sentences = sent_tokenize(text.lower())\n",
    "            self.tokens = [word_tokenize(sent) for sent in self.sentences]\n",
    "        else:\n",
    "            self.text = text\n",
    "\n",
    "    def build_vocab(self, min_freq=1):\n",
    "        # 统计词频\n",
    "        frequency = defaultdict(int)\n",
    "        for sentence in self.tokens:\n",
    "            for token in sentence:\n",
    "                frequency[token] += 1\n",
    "        self.frequency = frequency\n",
    "\n",
    "        # 加入<unk>处理未登录词，加入<pad>用于对齐变长输入进而加速\n",
    "        self.token2id = {'<unk>': 1, '<pad>': 0}\n",
    "        self.id2token = {1: '<unk>', 0: '<pad>'}\n",
    "        for token, freq in sorted(frequency.items(), key=lambda x: -x[1]):\n",
    "            # 丢弃低频词\n",
    "            if freq > min_freq:\n",
    "                self.token2id[token] = len(self.token2id)\n",
    "                self.id2token[len(self.id2token)] = token\n",
    "            else:\n",
    "                break\n",
    "\n",
    "    def get_word_distribution(self):\n",
    "        distribution = np.zeros(vocab_size)\n",
    "        for token, freq in self.frequency.items():\n",
    "            if token in dataset.token2id:\n",
    "                distribution[dataset.token2id[token]] = freq\n",
    "            else:\n",
    "                # 不在词表中的词按<unk>计算\n",
    "                distribution[1] += freq\n",
    "        distribution /= distribution.sum()\n",
    "        return distribution\n",
    "\n",
    "    # 将分词结果转化为索引表示\n",
    "    def convert_tokens_to_ids(self, drop_single_word=True):\n",
    "        self.token_ids = []\n",
    "        for sentence in self.tokens:\n",
    "            token_ids = [self.token2id.get(token, 1) for token in sentence]\n",
    "            # 忽略只有一个token的序列，无法计算loss\n",
    "            if len(token_ids) == 1 and drop_single_word:\n",
    "                continue\n",
    "            self.token_ids.append(token_ids)\n",
    "        \n",
    "        return self.token_ids\n",
    "\n",
    "dataset = TheLittlePrinceDataset()\n",
    "dataset.build_vocab(min_freq=1)\n",
    "sentences = dataset.convert_tokens_to_ids()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "efc882de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(76044, 2) [[  4  16]\n",
      " [  4  19]\n",
      " [ 16   4]\n",
      " ...\n",
      " [130   3]\n",
      " [  3  86]\n",
      " [  3 130]]\n"
     ]
    }
   ],
   "source": [
    "# 遍历所有的中心词-上下文词对\n",
    "window_size = 2\n",
    "data = []\n",
    "\n",
    "for sentence in sentences:\n",
    "    for i in range(len(sentence)):\n",
    "        for j in range(i-window_size, i+window_size+1):\n",
    "            if j == i or j < 0 or j >= len(sentence):\n",
    "                continue\n",
    "            center_word = sentence[i]\n",
    "            context_word = sentence[j]\n",
    "            data.append([center_word, context_word])\n",
    "\n",
    "# 需要提前安装numpy\n",
    "import numpy as np\n",
    "data = np.array(data)\n",
    "print(data.shape, data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "30903b3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 需要提前安装PyTorch\n",
    "import torch\n",
    "from torch import nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# 实现skipgram算法，使用对比学习计算损失\n",
    "class SkipGramNCE(nn.Module):\n",
    "    def __init__(self, vocab_size, embed_size, distribution,\\\n",
    "                 neg_samples=20):\n",
    "        super(SkipGramNCE, self).__init__()\n",
    "        print(f'vocab_size = {vocab_size}, embed_size = {embed_size}, '+\\\n",
    "              f'neg_samples = {neg_samples}')\n",
    "        self.input_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        self.output_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        distribution = np.power(distribution, 0.75)\n",
    "        distribution /= distribution.sum()\n",
    "        self.distribution = torch.tensor(distribution)\n",
    "        self.neg_samples = neg_samples\n",
    "        \n",
    "    def forward(self, input_ids, labels):\n",
    "        i_embed = self.input_embeddings(input_ids)\n",
    "        o_embed = self.output_embeddings(labels)\n",
    "        batch_size = i_embed.size(0)\n",
    "        n_words = torch.multinomial(self.distribution, batch_size * \\\n",
    "            self.neg_samples, replacement=True).view(batch_size, -1)\n",
    "        n_embed = self.output_embeddings(n_words)\n",
    "        pos_term = F.logsigmoid(torch.sum(i_embed * o_embed, dim=1))\n",
    "        # 负采样，用于对比学习\n",
    "        neg_term = F.logsigmoid(- torch.bmm(n_embed, \\\n",
    "            i_embed.unsqueeze(2)).squeeze())\n",
    "        neg_term = torch.sum(neg_term, dim=1)\n",
    "        loss = - torch.mean(pos_term + neg_term)\n",
    "        return loss\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1d9da6c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00000000e+00 5.43983724e-02 5.34295679e-02 ... 9.68804495e-05\n",
      " 9.68804495e-05 9.68804495e-05]\n",
      "vocab_size = 1078, embed_size = 128, neg_samples = 20\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch-99, loss=3.0015: 100%|█| 100/100 [10:17<00:00,  6.17s/\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGwCAYAAABcnuQpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJTklEQVR4nO3dfVxUdd4//teZWwYYBgSZAUFAJe/vNRMt6UZKzW68utVKr65tMyuztmxd2yvadqHczaws92fbZbatq1/LXLc2FU3RMhVR0/BeUVBBRHGG2xlm5vP7A+foiJoyN4eb1/PxmEdyzpnDm5M6Lz+f9/kcSQghQERERNRCqZQugIiIiMgXDDNERETUojHMEBERUYvGMENEREQtGsMMERERtWgMM0RERNSiMcwQERFRi6ZRuoBAc7vdOHnyJIxGIyRJUrocIiIiugZCCFRWViI+Ph4q1dXHXlp9mDl58iQSExOVLoOIiIiaoLi4GAkJCVc9ptWHGaPRCKDhYkRERChcDREREV0Lm82GxMRE+XP8alp9mPFMLUVERDDMEBERtTDX0iLCBmAiIiJq0RhmiIiIqEVjmCEiIqIWjWGGiIiIWjSGGSIiImrRGGaIiIioRWOYISIiohaNYYaIiIhaNIYZIiIiatEYZoiIiKhFY5ghIiKiFo1hhoiIiFq0Vv+gyUCpsjtxrsYBg1aN6HC90uUQERG1WRyZaaJPfyjE8LfX4S+r9ytdChERUZvGMNNEWnXDpbM73QpXQkRE1LYxzDSRJ8zUu4TClRAREbVtDDNNpNWcDzMcmSEiIlIUw0wT6dQSAKDexTBDRESkJIaZJtKdH5lxMMwQEREpimGmiS70zDDMEBERKYlhponYAExERNQ8MMw0kY4jM0RERM0Cw0wTeUZmHLybiYiISFEMM02k5d1MREREzQLDTBPJ68ywZ4aIiEhRDDNNpOM0ExERUbPAMNNEvDWbiIioeWCYaSJPzwwXzSMiIlIWw0wTcWSGiIioeVA0zCQnJ0OSpEavZ599FgAghEBmZibi4+NhMBiQnp6OgoICJUuW6dgATERE1CwoGmby8vJQUlIiv3JycgAADz74IABg1qxZmD17NubOnYu8vDxYLBaMHDkSlZWVSpYN4EIDsMst4HIz0BARESlF0TDTvn17WCwW+fX111+jc+fOGDFiBIQQmDNnDmbOnIlx48ahV69eWLhwIWpqarBo0aIrntNut8Nms3m9AsFzazbAqSYiIiIlNZueGYfDgc8//xxPPvkkJElCYWEhSktLkZGRIR+j1+sxYsQIbNq06Yrnyc7Ohslkkl+JiYkBqdfTAAwwzBARESmp2YSZ5cuX49y5c5g0aRIAoLS0FABgNpu9jjObzfK+y5kxYwasVqv8Ki4uDki9WtXFIzOcZiIiIlKKRukCPD755BOMGjUK8fHxXtslSfL6WgjRaNvF9Ho99Hp9QGq8mEolQaOS4HQLjswQEREpqFmMzBw7dgxr1qzBr371K3mbxWIBgEajMGVlZY1Ga5TCh00SEREpr1mEmQULFiA2NhZjxoyRt6WkpMBisch3OAENfTW5ublIS0tTosxGuHAeERGR8hSfZnK73ViwYAEmTpwIjeZCOZIkYdq0acjKykJqaipSU1ORlZWF0NBQjB8/XsGKL7iw1gzDDBERkVIUDzNr1qxBUVERnnzyyUb7pk+fjtraWkyZMgUVFRUYMmQIVq9eDaPRqECljcmrADvZAExERKQUSQjRqj+JbTYbTCYTrFYrIiIi/HruW2atQ9HZGnz5TBoGJkX59dxERERt2fV8fjeLnpmWytMzw2kmIiIi5TDM+ECnUQNgmCEiIlISw4wPdByZISIiUhzDjA8urDPTqtuOiIiImjWGGR/IdzNxZIaIiEgxDDM+0HKdGSIiIsUxzPiAPTNERETKY5jxgdwzw6dmExERKYZhxgd80CQREZHyGGZ8wAZgIiIi5THM+ECnOd8zw5EZIiIixTDM+IAjM0RERMpjmPEBG4CJiIiUxzDjAx3XmSEiIlIcw4wPOM1ERESkPIYZH3DRPCIiIuUxzPiAD5okIiJSHsOMDzjNREREpDyGGR/wQZNERETKY5jxgadnho8zICIiUg7DjA8urDPDMENERKQUhhkfsGeGiIhIeQwzPrgQZng3ExERkVIYZnwgP2iSIzNERESKYZjxgU6tBsAGYCIiIiUxzPhAyxWAiYiIFMcw44ML68ywZ4aIiEgpDDM+0PFuJiIiIsUxzPiAt2YTEREpj2HGB1quAExERKQ4hhkfcAVgIiIi5THM+EDHBmAiIiLFMcz4wDMy43ILuNwMNEREREpgmPGBp2cGYBMwERGRUhhmfOAZmQEYZoiIiJSieJg5ceIEHnvsMURHRyM0NBT9+vVDfn6+vF8IgczMTMTHx8NgMCA9PR0FBQUKVnyBzivMcJqJiIhICYqGmYqKCgwbNgxarRbffvst9uzZg3feeQeRkZHyMbNmzcLs2bMxd+5c5OXlwWKxYOTIkaisrFSu8PNUKgkaFR9pQEREpCSNkt/87bffRmJiIhYsWCBvS05Oln8thMCcOXMwc+ZMjBs3DgCwcOFCmM1mLFq0CE8//XSwS25Eq1bB6XZxrRkiIiKFKDoys2LFCgwaNAgPPvggYmNj0b9/f3z88cfy/sLCQpSWliIjI0PeptfrMWLECGzatOmy57Tb7bDZbF6vQOLDJomIiJSlaJg5cuQI5s2bh9TUVKxatQqTJ0/G1KlT8dlnnwEASktLAQBms9nrfWazWd53qezsbJhMJvmVmJgY0J+Ba80QEREpS9Ew43a7MWDAAGRlZaF///54+umn8dRTT2HevHlex0mS5PW1EKLRNo8ZM2bAarXKr+Li4oDVD/D5TEREREpTNMzExcWhR48eXtu6d++OoqIiAIDFYgGARqMwZWVljUZrPPR6PSIiIrxegeQJM3b2zBARESlC0TAzbNgw7N+/32vbgQMHkJSUBABISUmBxWJBTk6OvN/hcCA3NxdpaWlBrfVK2DNDRESkLEXvZnrxxReRlpaGrKwsPPTQQ9i6dSvmz5+P+fPnA2iYXpo2bRqysrKQmpqK1NRUZGVlITQ0FOPHj1eydBmnmYiIiJSlaJgZPHgwvvrqK8yYMQN/+MMfkJKSgjlz5mDChAnyMdOnT0dtbS2mTJmCiooKDBkyBKtXr4bRaFSw8gsuNAAzzBARESlBEkK06ttwbDYbTCYTrFZrQPpn/mveJuQfq8BfHxuIu3pZ/H5+IiKituh6Pr8Vf5xBS8eeGSIiImUxzPhIp1EDYJghIiJSCsOMj3QcmSEiIlIUw4yPPHczObgCMBERkSIYZnwk35rNRfOIiIgUwTDjI64zQ0REpCyGGR/pNA09Mw6OzBARESmCYcZHHJkhIiJSFsOMj9gATEREpCyGGR9xZIaIiEhZDDM+4jozREREymKY8RFHZoiIiJTFMOMj7fmnZjuc7JkhIiJSAsOMj3QcmSEiIlIUw4yPPCMzDDNERETKYJjxERuAiYiIlMUw4yOuM0NERKQshhkf8UGTREREymKY8dGFkRmGGSIiIiUwzPjI86BJ9swQEREpg2HGR/LIDKeZiIiIFMEw4yOuAExERKQshhkfXQgzvJuJiIhICQwzPuIKwERERMpimPGRjisAExERKYphxkfa8ysAswGYiIhIGQwzPmLPDBERkbIYZnzEaSYiIiJlMcz4yDMy43QLuN0cnSEiIgo2hhkfeXpmAD7SgIiISAkMMz7yjMwAnGoiIiJSAsOMj7zDDKeZiIiIgo1hxkdqlQS1ig+bJCIiUgrDjB9wrRkiIiLlMMz4AR82SUREpByGGT/QceE8IiIixSgaZjIzMyFJktfLYrHI+4UQyMzMRHx8PAwGA9LT01FQUKBgxZfHhfOIiIiUo/jITM+ePVFSUiK/du/eLe+bNWsWZs+ejblz5yIvLw8WiwUjR45EZWWlghU35plm4jozREREwadRvACNxms0xkMIgTlz5mDmzJkYN24cAGDhwoUwm81YtGgRnn766cuez263w263y1/bbLbAFH4RTwNwPRuAiYiIgk7xkZmDBw8iPj4eKSkpeOSRR3DkyBEAQGFhIUpLS5GRkSEfq9frMWLECGzatOmK58vOzobJZJJfiYmJAf8Z+LBJIiIi5SgaZoYMGYLPPvsMq1atwscff4zS0lKkpaXhzJkzKC0tBQCYzWav95jNZnnf5cyYMQNWq1V+FRcXB/RnAC70zDhcroB/LyIiIvKm6DTTqFGj5F/37t0bQ4cORefOnbFw4ULcdNNNAABJkrzeI4RotO1ier0eer0+MAVfgdwz4+TIDBERUbApPs10sbCwMPTu3RsHDx6U+2guHYUpKytrNFqjNLlnhg3AREREQdeswozdbsfevXsRFxeHlJQUWCwW5OTkyPsdDgdyc3ORlpamYJWNcdE8IiIi5Sg6zfTyyy9j7Nix6NixI8rKyvDHP/4RNpsNEydOhCRJmDZtGrKyspCamorU1FRkZWUhNDQU48ePV7LsRnQMM0RERIpRNMwcP34cjz76KMrLy9G+fXvcdNNN2Lx5M5KSkgAA06dPR21tLaZMmYKKigoMGTIEq1evhtFoVLLsRi6sM8OeGSIiomBTNMwsXrz4qvslSUJmZiYyMzODU1ATaT0rAHOdGSIioqBrVj0zLRWnmYiIiJTDMOMHOg3vZiIiIlIKw4wfsGeGiIhIOQwzfsBbs4mIiJTDMOMHF1YAZpghIiIKNoYZP9BxBWAiIiLFMMz4AaeZiIiIlMMw4weedWb4oEkiIqLgY5jxA47MEBERKYdhxg/YM0NERKQchhk/4MgMERGRchhm/ICL5hERESmHYcYPdHzQJBERkWIYZvyA00xERETKYZjxAz5okoiISDkMM37AnhkiIiLlMMz4wYVnM7kUroSIiKjtYZjxgws9MxyZISIiCjaGGT/QsQGYiIhIMQwzfqBlAzAREZFiGGb84ELPDMMMERFRsDHM+IGOPTNERESKYZjxAy6aR0REpByGGT/wPM7A6RZwuzk6Q0REFEwMM36gVUvyr+vdHJ0hIiIKJoYZP/BMMwHsmyEiIgo2hhk/8AozvKOJiIgoqBhm/ECtkqBWNUw1OdgETEREFFQMM37i6ZvhWjNERETBxTDjJ7w9m4iISBkMM37ChfOIiIiUwTDjJxyZISIiUgbDjJ94HjbJBmAiIqLgYpjxE3lkhg3AREREQcUw4yfsmSEiIlJGswkz2dnZkCQJ06ZNk7cJIZCZmYn4+HgYDAakp6ejoKBAuSKvwvN8JvbMEBERBVezCDN5eXmYP38++vTp47V91qxZmD17NubOnYu8vDxYLBaMHDkSlZWVClV6ZZ5pJvbMEBERBZfiYaaqqgoTJkzAxx9/jKioKHm7EAJz5szBzJkzMW7cOPTq1QsLFy5ETU0NFi1apGDFl+dZNI8jM0RERMGleJh59tlnMWbMGNxxxx1e2wsLC1FaWoqMjAx5m16vx4gRI7Bp06Yrns9ut8Nms3m9gkEemWEDMBERUVBplPzmixcvxvbt25GXl9doX2lpKQDAbDZ7bTebzTh27NgVz5mdnY033njDv4VeAx3XmSEiIlJEk0ZmFi5ciG+++Ub+evr06YiMjERaWtpVg8bFiouL8cILL+Dzzz9HSEjIFY+TJMnrayFEo20XmzFjBqxWq/wqLi6+pnp8daFnhnczERERBVOTwkxWVhYMBgMA4Mcff8TcuXMxa9YsxMTE4MUXX7ymc+Tn56OsrAwDBw6ERqOBRqNBbm4u3n//fWg0GnlExjNC41FWVtZotOZier0eERERXq9g0Gq4zgwREZESmjTNVFxcjC5dugAAli9fjgceeAC//vWvMWzYMKSnp1/TOW6//Xbs3r3ba9t///d/o1u3bnj11VfRqVMnWCwW5OTkoH///gAAh8OB3NxcvP32200pO6DYAExERKSMJoWZ8PBwnDlzBh07dsTq1avl0ZiQkBDU1tZe0zmMRiN69erltS0sLAzR0dHy9mnTpiErKwupqalITU1FVlYWQkNDMX78+KaUHVDsmSEiIlJGk8LMyJEj8atf/Qr9+/fHgQMHMGbMGABAQUEBkpOT/Vbc9OnTUVtbiylTpqCiogJDhgzB6tWrYTQa/fY9/IU9M0RERMpoUpj58MMP8dprr6G4uBhffvkloqOjATT0wTz66KNNLmb9+vVeX0uShMzMTGRmZjb5nMHCp2YTEREpo0lhJjIyEnPnzm20XYlbopsLHRuAiYiIFNGku5lWrlyJ77//Xv76ww8/RL9+/TB+/HhUVFT4rbiWRMcGYCIiIkU0Kcy88sor8sq6u3fvxm9+8xuMHj0aR44cwUsvveTXAlsK9swQEREpo0nTTIWFhejRowcA4Msvv8Tdd9+NrKwsbN++HaNHj/ZrgS2FZ50ZPs6AiIgouJo0MqPT6VBTUwMAWLNmjfz8pHbt2gXtWUjNDRuAiYiIlNGkkZnhw4fjpZdewrBhw7B161YsWbIEAHDgwAEkJCT4tcCWgj0zREREymjSyMzcuXOh0WjwxRdfYN68eejQoQMA4Ntvv8Vdd93l1wJbCo7MEBERKaNJIzMdO3bE119/3Wj7u+++63NBLRUbgImIiJTRpDADAC6XC8uXL8fevXshSRK6d++Oe++9F2q12p/1tRh80CQREZEymhRmDh06hNGjR+PEiRPo2rUrhBA4cOAAEhMT8c0336Bz587+rrPZY88MERGRMprUMzN16lR07twZxcXF2L59O3bs2IGioiKkpKRg6tSp/q6xRWDPDBERkTKaNDKTm5uLzZs3o127dvK26OhovPXWWxg2bJjfimtJ2DNDRESkjCaNzOj1elRWVjbaXlVVBZ1O53NRLZH8bCaOzBAREQVVk8LM3XffjV//+tfYsmULhBAQQmDz5s2YPHky7rnnHn/X2CJwmomIiEgZTQoz77//Pjp37oyhQ4ciJCQEISEhSEtLQ5cuXTBnzhw/l9gy6NS8m4mIiEgJTeqZiYyMxL/+9S8cOnQIe/fuhRACPXr0QJcuXfxdX4uh1TTczeTgyAwREVFQXXOY+aWnYa9fv17+9ezZs5tcUEslNwBzZIaIiCiorjnM7Nix45qOkySpycW0ZPI0E+9mIiIiCqprDjPr1q0LZB0tHhuAiYiIlNGkBmBqTHt+BWCnW8Dt5ugMERFRsDDM+Inn2UwAUO/m6AwREVGwMMz4iadnBmDfDBERUTAxzPiJ9uIwwzuaiIiIgoZhxk/UKglqFZ+cTUREFGwMM37kaQLmwnlERETBwzDjR1quNUNERBR0DDN+ZNCqAQBVdU6FKyEiImo7GGb8KCHKAAAoOlujcCVERERtB8OMHyVHhwEAjp6pVrgSIiKitoNhxo+SzoeZYwwzREREQcMw40fJMaEAgKNnOM1EREQULAwzfpTMkRkiIqKgY5jxI0+YOWWzo8bBO5qIiIiCgWHGj0yhWkSGagEAxzjVREREFBQMM37GJmAiIqLgUjTMzJs3D3369EFERAQiIiIwdOhQfPvtt/J+IQQyMzMRHx8Pg8GA9PR0FBQUKFjxL0uOZhMwERFRMCkaZhISEvDWW29h27Zt2LZtG2677Tbce++9cmCZNWsWZs+ejblz5yIvLw8WiwUjR45EZWWlkmVfFZuAiYiIgkvRMDN27FiMHj0aN9xwA2644Qb86U9/Qnh4ODZv3gwhBObMmYOZM2di3Lhx6NWrFxYuXIiamhosWrRIybKvynN7dmE5wwwREVEwNJueGZfLhcWLF6O6uhpDhw5FYWEhSktLkZGRIR+j1+sxYsQIbNq06YrnsdvtsNlsXq9gutAzw2kmIiKiYFA8zOzevRvh4eHQ6/WYPHkyvvrqK/To0QOlpaUAALPZ7HW82WyW911OdnY2TCaT/EpMTAxo/ZfyTDOVWOtQV+8K6vcmIiJqixQPM127dsXOnTuxefNmPPPMM5g4cSL27Nkj75ckyet4IUSjbRebMWMGrFar/CouLg5Y7ZcTFaqFMUQDgA+cJCIiCgbFw4xOp0OXLl0waNAgZGdno2/fvnjvvfdgsVgAoNEoTFlZWaPRmovp9Xr57ijPK5gkSbrwwEn2zRAREQWc4mHmUkII2O12pKSkwGKxICcnR97ncDiQm5uLtLQ0BSv8ZUnnb89m3wwREVHgaZT85r/73e8watQoJCYmorKyEosXL8b69euxcuVKSJKEadOmISsrC6mpqUhNTUVWVhZCQ0Mxfvx4Jcv+RSkxDSMzhbw9m4iIKOAUDTOnTp3C448/jpKSEphMJvTp0wcrV67EyJEjAQDTp09HbW0tpkyZgoqKCgwZMgSrV6+G0WhUsuxfxFWAiYiIgkcSQgiliwgkm80Gk8kEq9UatP6ZbUfP4oG//ogOkQb88NvbgvI9iYiIWpPr+fxudj0zrYFnZOaktRZ2J2/PJiIiCiSGmQCICdchTKeGEEDx2VqlyyEiImrVGGYCQJIk9s0QEREFCcNMgPAZTURERMHBMBMgyXxGExERUVAwzASIvAowp5mIiIgCimEmQLgKMBERUXAwzARI8vlVgI9X1MDhdCtcDRERUevFMBMgsUY9QrQquAVw4hxvzyYiIgoUhpkA4dOziYiIgoNhJoDYBExERBR4DDMBlBTDJmAiIqJAY5gJII7MEBERBR7DTABZTCEAgFM2u8KVEBERtV4MMwEUa9QDAE5X1ilcCRERUevFMBNAscaGkZkz1Q44XVxrhoiIKBAYZgIoOkwHtUqCEEB5lUPpcoiIiFolhpkAUqkkxITrAACnbJxqIiIiCgSGmQAzRzRMNZVVsgmYiIgoEBhmAszTBFzGJmAiIqKAYJgJsPbnm4DLeHs2ERFRQDDMBNiFkRmGGSIiokBgmAmw2AiuNUNERBRIDDMB5llrhiMzREREgcEwE2CeaSbemk1ERBQYDDMB5rk1u7zKAZdbKFwNERFR68MwE2Ax4TpIEuByC5yt5irARERE/sYwE2AatQrRYQ2rAHOtGSIiIv9jmAmC9mwCJiIiChiGmSDwNAGf5sJ5REREfscwEwR8pAEREVHgMMwEgWfhPE4zERER+R/DTBB4bs/mWjNERET+xzATBHw+ExERUeAwzAQBn5xNREQUOAwzQSDfzVRphxBcBZiIiMifFA0z2dnZGDx4MIxGI2JjY3Hfffdh//79XscIIZCZmYn4+HgYDAakp6ejoKBAoYqbpv35MONwuWGtrVe4GiIiotZF0TCTm5uLZ599Fps3b0ZOTg6cTicyMjJQXV0tHzNr1izMnj0bc+fORV5eHiwWC0aOHInKykoFK78+IVo1TAYtAPbNEBER+ZtGyW++cuVKr68XLFiA2NhY5Ofn45ZbboEQAnPmzMHMmTMxbtw4AMDChQthNpuxaNEiPP3000qU3SSxRj2stfUos9lxg9modDlEREStRrPqmbFarQCAdu3aAQAKCwtRWlqKjIwM+Ri9Xo8RI0Zg06ZNlz2H3W6HzWbzejUHvD2biIgoMJpNmBFC4KWXXsLw4cPRq1cvAEBpaSkAwGw2ex1rNpvlfZfKzs6GyWSSX4mJiYEt/Brx9mwiIqLAaDZh5rnnnsOuXbvwz3/+s9E+SZK8vhZCNNrmMWPGDFitVvlVXFwckHqvV/sIPtKAiIgoEBTtmfF4/vnnsWLFCmzYsAEJCQnydovFAqBhhCYuLk7eXlZW1mi0xkOv10Ov1we24CaI5ZOziYiIAkLRkRkhBJ577jksW7YM3333HVJSUrz2p6SkwGKxICcnR97mcDiQm5uLtLS0YJfrEz45m4iIKDAUHZl59tlnsWjRIvzrX/+C0WiU+2BMJhMMBgMkScK0adOQlZWF1NRUpKamIisrC6GhoRg/frySpV83PjmbiIgoMBQNM/PmzQMApKene21fsGABJk2aBACYPn06amtrMWXKFFRUVGDIkCFYvXo1jMaWdXtzbMSFaaar9fwQERHR9ZFEK19f32azwWQywWq1IiIiQrE6qu1O9Hx9FQBgd2YGjCFaxWohIiJq7q7n87vZ3M3U2oXpNQjXNwyEsQmYiIjIfxhmgkjum2ETMBERkd8wzARRezYBExER+R3DTBB5moBPc5qJiIjIbxhmgoiPNCAiIvI/hpkgutAzw2kmIiIif2GYCaILT87myAwREZG/MMwEEVcBJiIi8j+GmSCKjWDPDBERkb8xzARR+/NPzq6sc6Ku3qVwNURERK0Dw0wQRYRooNc0XHIunEdEROQfDDNBJEmSPNV0in0zREREfsEwE2QJkaEAgKPl1QpXQkRE1DowzARZV4sRALC/tFLhSoiIiFoHhpkgk8PMKYYZIiIif2CYCTKOzBAREfkXw0yQ3WBuCDNllXZUVDsUroaIiKjlY5gJsnC9BglRBgCcaiIiIvIHhhkFdONUExERkd8wzCjA0zezj2GGiIjIZwwzCvD0zRzgNBMREZHPGGYU0M0SAQA4UFoJIYTC1RAREbVsDDMKSIkJg0YlodLuxIlztUqXQ0RE1KIxzChAp1Ghc/twAJxqIiIi8hXDjELYBExEROQfDDMK4UrARERE/sEwo5CuZoYZIiIif2CYUYhnZObw6SrUu9wKV0NERNRyMcwopEOkAWE6NepdAkfLq5Uuh4iIqMVimFGISiXhBjYBExER+YxhRkF8RhMREZHvGGYU5HmsgT+enu12Czz+yRY89rctcLm5qjAREbUdGqULaMv8eXv2kfJqbDxYDgAoOluDlJgwn89JRETUEnBkRkGe27OLztag2u706Vw/FZ+Tf32orMqncxEREbUkDDMKig7XIyZcDwA46GMA2XlRmDlYxh4cIiJqOxQNMxs2bMDYsWMRHx8PSZKwfPlyr/1CCGRmZiI+Ph4GgwHp6ekoKChQptgAudAEbPPpPD8dPyf/+tApjswQEVHboWiYqa6uRt++fTF37tzL7p81axZmz56NuXPnIi8vDxaLBSNHjkRlZesZefDHM5rq6l3YW3IhDB06zTBDRERth6INwKNGjcKoUaMuu08IgTlz5mDmzJkYN24cAGDhwoUwm81YtGgRnn766WCWGjD+eKzB3hIb6l0CapUEl1vgUFkV3G4BlUryV5lERETNVrPtmSksLERpaSkyMjLkbXq9HiNGjMCmTZuu+D673Q6bzeb1as76JkYCALYdrcC5GkeTzuFp/h3WJQZatYQahwsnrbV+qpCIiKh5a7ZhprS0FABgNpu9tpvNZnnf5WRnZ8NkMsmvxMTEgNbpq64WI3rERcDhcuNfO0826Rw/HbcCAAZ2jJJvyfa1oZiIiKilaLZhxkOSvKdKhBCNtl1sxowZsFqt8qu4uDjQJfrsoUEJAID/t61ptXpGZvommpAae/4BlgwzRETURjTbMGOxWACg0ShMWVlZo9Gai+n1ekRERHi9mrt7+3WATq1CwUkbfj5hva73WmvqceT8gyr7JkSic2w4AOAg72giIqI2otmGmZSUFFgsFuTk5MjbHA4HcnNzkZaWpmBl/hcVpsPIng0B7Yv849f1Xs8t2UnRoYgK0yHVE2a41gwREbURioaZqqoq7Ny5Ezt37gTQ0PS7c+dOFBUVQZIkTJs2DVlZWfjqq6/w888/Y9KkSQgNDcX48eOVLDsgHhzYMNW0fOcJ2J2ua36fPMWUEAkASDU3hJlDZVUQgs9oIiKi1k/RW7O3bduGW2+9Vf76pZdeAgBMnDgRn376KaZPn47a2lpMmTIFFRUVGDJkCFavXg2j0ahUyQFzc2p7WCJCUGqrw5o9ZRjTJ85rv62uHhqVhFCd9/8yz8iM566olJgwqCTAVufE6Uo7YiNCglE+ERGRYhQdmUlPT4cQotHr008/BdDQ/JuZmYmSkhLU1dUhNzcXvXr1UrLkgFGrJDww8PKNwKsKSjHoj2sw9oPvUeu4MGojhMDO4oYem37nw4xeo0ZSNO9oIiKitqPZ9sy0RZ4ws+HgaZw817BOzBf5x/HM5/lwON04fLoa89Yfko8/aa1DeZUdGpWEnvEXGp27xF6YaiIiImrtGGaakeSYMAxJaQchgGXbj+P/vi/Ey0t/glsAA5OiAAB/3XAERWdqAFzol+kWZ0SIVi2fpwubgImIqA1hmGlmHhrUsMjfh+sO4w9f7wEA/Gp4CpY+PRRpnaPhcLrx5jcN23de0vzrkcrbs4mIqA1hmGlmRvW2IFyvQW19Q2/Myxk3YOaY7lCpJGTe0xNqlYScPaeQe+D0hTBzvl/GQ144jw+cJCKiNoBhppkJ1Wnw+NAkaNUS3ry3J567LVVe8fgGsxEThyYDAN5YUSAvsNfvkjDTObahAbi8yoGz1U173hMREVFLwTDTDL16VzfszrwTj58PLhebNjIVMeE6HCmvRo3DhTCdGp3bh3sdE6rToEOkAQCbgImIqPVjmGmmLm7ovVhEiBbT7+omf907wQS1qvGzqi5ePI+IiKg1Y5hpgR4YkCBPLQ3oGHXZY7q05x1NRETUNii6AjA1jUol4aMJA7AkrxiT0pIvewxHZoiIqK1gmGmh4iMNeHHkDVfc3+X8HU28PZuIiFo7TjO1Up6F80ptdaisqw/I93C63AE5LxER0fVgmGmlTAYtYo16ABemmorP1uD/yz2Mv208Are76U/Urqt3Yeo/d6DH66vwbs4B1DPUEBGRgjjN1IqlmsNRVmnH/A1HUGKtkxfZA4CYcD3u69/hus9pq6vHUwu3YUvhWQDAe2sPYv3+Msx+uF+jW8SJiIiCgSMzrZjnjqZvfy7FzuJzUElAp5iGBfXeXrnP6wnc1+KUrQ4P/fVHbCk8i3C9Bi+NvAERIRr8dNyKMe9vxMJNR30a8SEiImoKhplWbGzfeESFajE4OQpv3NMTm393O/7zws3oEGlAibUO8zccueZzHTldhXEfbcK+0kq0N+qx5OmbMPX2VKx68RbcnBqDuno3Xl9RgN8s/emK5xBCIHNFAX775S44nJyaIiIi/5CEEK36n9I2mw0mkwlWqxURERFKl9MsfLOrBM8u2g6DVo11L6fDYgq56vH5x87iqc/ycbbagZSYMHz25I1IbBcq73e7Bf6++Rje+HcB3AJY93I6Us6PAF1sR1EF7v9oEwBg3IAOeOfBvvKjGoiIiC52PZ/fHJlpg0b3tmBwchRq612YtWrfVY/9184TePTjLThb7UCfBBO+mDzUK8gADeveTExLxs2p7QEAX+04cdlzLdt+wuvXc9Yc9PEnISIiYphpkyRJwu/v7gGgIVT8dFFjsIcQAnPWHMALi3fC4XQjo4cZi399E6LD9Vc877gBDQ3FX+04jksH/OxOF/696yQA4P7zjcfvrT2IpduK/fEjERFRG8Yw00b1SYiUw8ebX+/xCh+1DhdeXLJTHjl5ekQn/PWxgQjVXf3mt4weFoTp1Cg+W4ttxyq89q3bdxrnauphjtDjLw/2xTPpnQEAM5btxg+Hyv35oxERURvDW7PbsOl3dsO3u0ux7VgFHpm/GVV2J07Z6lBe5QAAaFQS/nhfLzxyY8drOp9Bp8ao3nH4Iv84lm0/gcHJ7eR9X+04DgC4r38HqFUSXsnoiuMVtfj3Tycx+e/5eOWurhjQMQrdLEZo1MzYRER07Rhm2jCLKQTPpHfG7JwD8roxHrFGPeY83A9pXWKu65zj+nfAF/nH8c2uk3h9bA+EaNWoqHbgu31l5/cnAGjos/nzA31Qaq1F3tEK/O+/CgAABq0avRNMGJLSDnf1sqBHXASbhImI6KoYZtq4Z9I7IzpcBwkSLCY9LBEGWEwhiArVNilE3NQpGnGmEJRY6/DdvjKM7h2Hr3edRL1LoFeHCHS1GOVjQ7RqfDJpMBb+cBRbj57FzuJzqKxzYmvhWWwtPIsPvjuE5OhQjOodhzG949AjLgIqlXdNQgjkHa3A/9tWjFUFpYgJ1yOjhxkZPS3onxjZ6PirEULg2JkabCk8A5NBizu6m5vlKFGptQ7vrT0IjUrC1NtT0d545T6m61VWWYclW4uhUknoER+BHnERiDXq23Sg9EzBtuVrQNTc8dZs8ru3vt2Hv+Yexh3dzfjbxEG478MfsLP4HP737h54cnjKFd/ndgscPl2F7UUVWLfvNNbtL4P9ovVownRqpJqN6Go2oqvFiNp6F5ZuK8bRMzWXPV97ox6jelnw4h03ICpMd9lj6upd+M/uEvxw6Ax+PFyOk9Y6eV+nmDA8f3sXjO0T3yxCTV29C598X4gP1x1CzfkFD40hGrxyZ1dMGJIE9XUEt0tV2534eOMRzN9wRD63R3SYDn0TI/HsrV0wMCnKp5/hWtidLmhVqusKooFy4lwtJv89H0Vna3B791jc1dOCW25ojxCt2us4IQSEwFVr/qn4HH48cgYZPczo5MfVst1ugY/WH8LBsip0s0TIIdQTcoUQqHa4YK2thzFEg4gQrd++N127KrsTBq3apz+n/vTdvlPIO1qBRwYnIim68VIazcH1fH4zzJDfHThViYx3N0CjkrDk6ZvwX/N+hFolYcvvbkfMVe6GulS13Ynv9pXhP7tLsG5/GerqL7/QXphOjbv7xGPcgA4or3JgVUEp1u0rQ6XdCQBIiQnDJxMHNfoAOVpejcmf52NfaaW8TauW0C8xEofKqlBRUy+//7lbu2B07zgYdN4fYtfjdKUdmw6XI0SrRpfYcHRsFwrtNYQkIQTW7C3Dm1/vQdHZhuA2MCkKdqcLP5+wAQB6xkfgD/f2uu6w4XC6sTS/GO/mHER5lR0A0DcxEh3bhWJviQ1HTlfh4kWd/2tAAl4d1RWxxquvTXQ5J8/VIv9YBW5MaQdzROP31zicmLVyP/6++RiMIRoMSWmHmzpF46ZO0ejYLhSF5dU4fLoKR05X4+iZakQatEg1G3GD2YgbzOGIDL18YG2qn4rP4X8WbpOvi4dBq8bNqTHQqCWcstlxylaHMpsdoXo1pqR3xsS0ZOg1F36f2J0uvJtzEPM3HJav5aCkKDw0KBGj+8QhXN/0AXKny43pX+zCsssshxAdpoMAYKuth/P8N9aqJfwmoyueurmT3z5U6+pdWL3nFNxugVu7xsIU2rSwdKisEv/YUgRbrRP/O7YHTIarn0cIAYfLjVqHCzUOFyQJ0GvU0GtU0GlU0KikK46m1dW7sKqgFNuPVWBwSjuM7GH2+n/mT3anC39dfwQfrj+EhCgDPni0P3rGm5p0Ls/fBcVna5BqDkc3S8R1j8za6urxxoo9+HJ7Qx+jWiXh3n7xeO7WLn4N2f7AMHMRhhll3P3BRvx8woYOkQacOFeL27rF4v8mDW7y+epdbhwtr8b+U5U4UFqJfaWVcLjcGNM7DqN7xyHskg8Eh9ONHw6V47XlP+PEuVqYDFrMe2wA0jo39ACt3XsK05bsRGWdEzHhejwwMAHDukRjUFI7GHRqVNud+OzHY5i/4bAcanRqFfp1jMTQTtEY2jkaN5iNcDjdsDtdqKt3w+F0Q6OWoNeoEKJVI0SrRqm1Dmv3nsLafWX46fg5XPynTauWkBQdhq4WI+7r1wG3dm3vNQIkhMD3h8rx/tqDyDvacHeYOUKP343ujnv6xsMtgEVbi/Dnlftgq2sIbiFaFYwhWhhDNDCGaGEyaNEuVIt2YXq0C9PCGKJFibXufCiowrEzNfIHXVJ0KF69qxtG9bLIHwK1Dhf2n6rEPzYfw9L8hr/8jHoNXrgjFRPTkn8xjDmcbqzdewqL84qx4eBpCAHoNSpMGJKEyemd5FC06VA5Xl22C8Vna5v0+wMAokK1CNNrYNCqYdCpYdCq8cDABDw4KPG6z7Xy5xJMW7ITdfVudLMY8cqdXfHDoTNYVVCKE+euXmNSdCh+N7o7MnqY8fMJG36zdCcOnGp42GvP+AjsLbHJoSZUp8bApCgkRIUiIcqAhCgDzBEhcAsBp0ug3uVGvcuNpOgwdI/z/vurrt6F5/+5Azl7TkGtkjBxaDLKKuuwp8SGwvJqXPo3u0Ylyf+vh6S0wzsP9UVC1IU1o+xOF74/WI7dJ6xQSRLUKgkalQSNWoUOkQb07xjpFULPVNnx983H8Pcfj+FMdcNNA1q1hGFdYjC6VxxG9jBfcUTUo97lRs6eU/j7j8fw45Ez8vaBSVH47MkbG/25drrcyP52H77IP44quxOuqzw+Ra9RoXcHEwYmRWFAUhQGdIzC0TPV+DL/OL7ZVSL/Ywdo+L1zf/8EPDw40Wsq3Fd5R89ixrLd8sN+gYa/R2aM7oZJacnXNXVZcNKKzBUF8t8FHtFhOnS1GHFzanuM6R2HjtGhVzhDw5+zl5f+hJPWOkhSw12tnqU5VBJwT9949OpgQom1DqXWOpRYa1HjcKFvQiRu6twOQ1KiER9pANAw0rS3xIaCE1bsKbFhVO843No19jquzi9jmLkIw4wyPvm+EG9+vUf+eu74/ri7T3zQ6zhdacev/74NO4rOyXdnnbTW4f21DbedD0yKwkcTBlx2pACAHGo++/EoSi6agmqqnvERUEkSDp+uajSdE2cKwUODEvHw4ETsL63Ee2sPyg8H1WlU+NXwFDx7a5dGf8GfqbLj7ZX7sDT/eKMPsGsRE67Dc7d2wfghSdBprhxOthdVIHNFAXYdtwIAOkQazoeFBK8PxXqXG3mFZ5Gz9xRW7Dwpf9ABQGI7gxxYQrQqPDE0GZV1Tvxza5F8zj/e3wsmgxabj5zB5iNnse3oWdQ4XIgK1aJLbDg6tw9HUnQYKmocOHCqEgdPVV01YHz+P0MwPPXyjexFZ2rw80krDDo1Qs+HoI0Hy/HnVfsBAOld2+ODR/vDeH5qRgiBn0/Y8P2hchi0KpgjQhAbEYJYox4/HjmDP6/aj9OVDSM5vTpEYG9JJVxugZhwHbLu742MnhacstVh2fYTWLqtGEfKq3/x/49H38RIPDakI8b2jYfTLfDUwm348cgZ6DQqfDR+AO7oYZaPrbY7ceR0NXQaFUyGhlAbolVh6bbjeOPfBah2uGDUa/DGvT3RLkyHr3eVYFVBKSrrnFepAIg3haBfx0gYtBp8veukPA3cIdKAML1aDm0AIEkNwdcUqkWkQQeTQQutWkKNw4W6ehdq6104XWmX/7GgkoDbusUi72gFrLX1SOscjf+bNFie0quyO/Hcou1Yv/90o7p0ahUEBOpd1/4HICHKgOFdYrB+/2mU2i782e7YLhTR4TpEheoQadAiMlSHEK0KWnXDiI9O3TAFKoSAyy3gOj/FqNeoEKbXIFSnRphOg7X7yuTf1zHhOky/qxtWF5zCmr2nAAB3dI/FrAf6IlSnxuHTVdhfWokDp6qgUUm4wdIwnZ4SE4YahxPvrD6Af2w5BrdoGBlM6xyNI+UNI5SX/pnvk2DCmN5xGJQchSp7w/SitbYeBSesWJzXsK5XUnQo3nmwLwYlt8NPxefwwXeH5Lp+Scd2oVCrpEbf+1fDU/Da+fXL/IVh5iIMM8o4XWnHTdlr4XILGEM0yJt5R6M+g2Cpq3fhlS924d8/nfTa/sTQJLw2psdVP8A9hBA4eqYGPx4+gx+PnMGPh8+gvMoOjUpCiPbC0Ha9S8Be70Kd04V6l0CIVoXhXWJwe3czbusWK4cmt1ug1FaHQ2VV+P5QOb7IP46zF33oe4RoVRh/YxKeHtHpioHLo8ruREW1A7a6elTWOVFZ58S5Ggcqahw4W12PimoHztU6YI4IQef24ejUPgyd24fDEhFyzf0pbrfA0vxizFq5Xw4pkgQM7xKDW7vGYntRBXIPnPb6UGxv1OPBgQl4aFAikqJDsfFgOWbnHPB6ijsAPHZTR/x2VPdG0y71Ljdq7K6rTl9U1tXjeEUtautdqHM0fFCu+Okk/rXzJOJMIVg57ZZG0xbfHyzHpAVb5dGKS01KS8ZrY7pfV79Uld2JeesP4eONhfLzx8b0icOb9/ZCu0tGKYQQ2HXcigOnKlFcUYvjFTU4XlGL8kp7w6iIWgWdumGqpOCkVf6gjgjRIMaox5HT1QjXa/DxE4MwtHP0Ndd47Ew1XlyyE9uLzjXaF2vU45Yb2kOrVsHldsPpFnA43ThUVoUDpypx6aXqm2DCr27uhFG9LNCoVThUVoWVP5fgP7tLsafEdk31xITr8eiNiXjkxo7oEGnAzuJzmPDxZlQ7XLijeyzmPTYQZ6sdePLTPBSctCFEq8KsB/rixuR2CNU3hFDP/yO3u2HqyV7vRnm1HTuKziH/2FnkH6vAgVNVCNWpMbp3HP5rQAKGpLSDSiXB5RbYcOA0luQVY83eU1f8/dBUDw9KxIzR3RAZqoMQAp/9eAx/+mYvHC43QnVq1NW7Gl1XD41Kgk6jkv/hc3efOPxudHd5dKTW4cLBskr8VHwO3/5cis1HzlzxXB6P3dQRM0Z1b/SPop9PWPHZj0dRV+9GnCkE5ogQxJlCoFGrsO3YWWw+cha7j5/zOr8lIgQ94yPQMz4Cw1Pb48aUdvAnhpmLMMwo578XbMW6/afxyOBEvPVffRStRQiBd9ccxPtrD0KvUSF7XG+MG5Dg0/lcbnHVDzrPEPi19CbYnS6s/LkUi7YUYUvhWRi0ajwxNAm/urmTX+9W8hdPz8GSvGJsOnym0f7oMB1u7RaLO3taGk2fAQ3Xb/3+03j/u4OosbuQeU/P6/pAvhY1DidGvbcRx87UYFz/Dpj9cD95375SGx6c9yMq7U50ah+GUJ26YcTA4YIkSZg8ohMeH5rc5O9dfLYGn/14FAOTonBXrziff5bTlXYszS/Goi1FOF7RMAoVFarFwidvRJ+EyOs+n9PlxkfrD+P9tQcRGarFqF5xuLtPHAYnt7tisK22O7HruBU7i8/hlK0Oo3vHYXBy1BWnSiqqHThb48C5mnpYaxv+63QJeQowVKdGmF6D7nERjf5BsfnIGUz8v62wO924vVss9pTYUGKtQ0y4Dn+bOBj9Eq//Z66sq4dWrbrqP6rOVjtw8FQlztXWn/+HQD3O1dTD7nSh3tUwlVzvEnC6BdQSoJIkqFQSVBJgd7pRbXeh2u5EjcOJML0GU29PxU2dGv++LjhpxfP/3IEjpxtG5iJDteh6vv/L6RY4cH463TMV1tVsxOv39JCnya+kvMqOlT+X4j+7S3DsTA0iDFqYDBqYDFpEhepwd5/4K45SXus13F50DhIaRpmvtiK8PzDMXIRhRjlHy6vx6aajePbWLs3mA3nX8XOICtU1er5Uc1Jmq4NBp5anNpq7ojM1WJpfjO1FFeiXGInbupnRLzGyWdy1kX/sLB78649wC2DehAEY1TsOpdY63P/RDyix1mFISjt89j83Bqz5099cboENB0/jh4PleHRIR3T2sWGzsq4eoTpNs/h/dan1+8vw1Gfb5BGpTu3D8OmkG6/aE9KS1NW7sK+0EvGmELS/zPIHQgiUWOtQXmVHj7iIZnFHZbAxzFyEYYaobZu1ch8+Wn8YUaFaLJsyDFP+sR17S2zo3D4My54Z1uS7byjwPI3YAzo29Lb5+441at4YZi7CMEPUtjmcbtz74Q/YW9LQb1FX70ZMuA5fTRnWrEfoqEFdvUuxfjtS1vV8fre9cSsialN0GhXefbgvdOqGIGPQqvHJxMEMMi0EgwxdC4YZImr1ulki8Kf7e6Fz+zB8NGEA+jahgZSImi8+m4mI2oQHByU2aQE9Imr+ODJDRERELRrDDBEREbVoLSLMfPTRR0hJSUFISAgGDhyIjRs3Kl0SERERNRPNPswsWbIE06ZNw8yZM7Fjxw7cfPPNGDVqFIqKipQujYiIiJqBZr/OzJAhQzBgwADMmzdP3ta9e3fcd999yM7ObnS83W6H3W6Xv7bZbEhMTOQ6M0RERC1Iq1lnxuFwID8/HxkZGV7bMzIysGnTpsu+Jzs7GyaTSX4lJvLuBSIiotasWYeZ8vJyuFwumM1mr+1msxmlpaWXfc+MGTNgtVrlV3FxcTBKJSIiIoW0iHVmLvcAris9qVWv10Ovbx4PNSQiIqLAa9YjMzExMVCr1Y1GYcrKyhqN1hAREVHb1KzDjE6nw8CBA5GTk+O1PScnB2lpaQpVRURERM1Js59meumll/D4449j0KBBGDp0KObPn4+ioiJMnjxZ6dKIiIioGWj2Yebhhx/GmTNn8Ic//AElJSXo1asX/vOf/yApKUnp0oiIiKgZaPbrzPjqeu5TJyIiouah1awzQ0RERPRLmv00k688A082m03hSoiIiOhaeT63r2UCqdWHmcrKSgDgSsBEREQtUGVlJUwm01WPafU9M263GydPnoTRaLziQntN5XnuU3FxMftxAozXOnh4rYOH1zp4eK2Dx1/XWgiByspKxMfHQ6W6eldMqx+ZUalUSEhICOj3iIiI4B+OIOG1Dh5e6+DhtQ4eXuvg8ce1/qURGQ82ABMREVGLxjBDRERELRrDjA/0ej1ef/11PtgyCHitg4fXOnh4rYOH1zp4lLjWrb4BmIiIiFo3jswQERFRi8YwQ0RERC0awwwRERG1aAwzRERE1KIxzDTRRx99hJSUFISEhGDgwIHYuHGj0iW1eNnZ2Rg8eDCMRiNiY2Nx3333Yf/+/V7HCCGQmZmJ+Ph4GAwGpKeno6CgQKGKW4/s7GxIkoRp06bJ23it/efEiRN47LHHEB0djdDQUPTr1w/5+fnyfl5r/3A6nXjttdeQkpICg8GATp064Q9/+APcbrd8DK9102zYsAFjx45FfHw8JEnC8uXLvfZfy3W12+14/vnnERMTg7CwMNxzzz04fvy4fwoUdN0WL14stFqt+Pjjj8WePXvECy+8IMLCwsSxY8eULq1Fu/POO8WCBQvEzz//LHbu3CnGjBkjOnbsKKqqquRj3nrrLWE0GsWXX34pdu/eLR5++GERFxcnbDabgpW3bFu3bhXJycmiT58+4oUXXpC381r7x9mzZ0VSUpKYNGmS2LJliygsLBRr1qwRhw4dko/htfaPP/7xjyI6Olp8/fXXorCwUCxdulSEh4eLOXPmyMfwWjfNf/7zHzFz5kzx5ZdfCgDiq6++8tp/Ldd18uTJokOHDiInJ0ds375d3HrrraJv377C6XT6XB/DTBPceOONYvLkyV7bunXrJn77298qVFHrVFZWJgCI3NxcIYQQbrdbWCwW8dZbb8nH1NXVCZPJJP76178qVWaLVllZKVJTU0VOTo4YMWKEHGZ4rf3n1VdfFcOHD7/ifl5r/xkzZox48sknvbaNGzdOPPbYY0IIXmt/uTTMXMt1PXfunNBqtWLx4sXyMSdOnBAqlUqsXLnS55o4zXSdHA4H8vPzkZGR4bU9IyMDmzZtUqiq1slqtQIA2rVrBwAoLCxEaWmp17XX6/UYMWIEr30TPfvssxgzZgzuuOMOr+281v6zYsUKDBo0CA8++CBiY2PRv39/fPzxx/J+Xmv/GT58ONauXYsDBw4AAH766Sd8//33GD16NABe60C5luuan5+P+vp6r2Pi4+PRq1cvv1z7Vv+gSX8rLy+Hy+WC2Wz22m42m1FaWqpQVa2PEAIvvfQShg8fjl69egGAfH0vd+2PHTsW9BpbusWLF2P79u3Iy8trtI/X2n+OHDmCefPm4aWXXsLvfvc7bN26FVOnToVer8cTTzzBa+1Hr776KqxWK7p16wa1Wg2Xy4U//elPePTRRwHw93WgXMt1LS0thU6nQ1RUVKNj/PHZyTDTRJIkeX0thGi0jZruueeew65du/D999832sdr77vi4mK88MILWL16NUJCQq54HK+179xuNwYNGoSsrCwAQP/+/VFQUIB58+bhiSeekI/jtfbdkiVL8Pnnn2PRokXo2bMndu7ciWnTpiE+Ph4TJ06Uj+O1DoymXFd/XXtOM12nmJgYqNXqRkmyrKysUSqlpnn++eexYsUKrFu3DgkJCfJ2i8UCALz2fpCfn4+ysjIMHDgQGo0GGo0Gubm5eP/996HRaOTryWvtu7i4OPTo0cNrW/fu3VFUVASAv6/96ZVXXsFvf/tbPPLII+jduzcef/xxvPjii8jOzgbAax0o13JdLRYLHA4HKioqrniMLxhmrpNOp8PAgQORk5PjtT0nJwdpaWkKVdU6CCHw3HPPYdmyZfjuu++QkpLitT8lJQUWi8Xr2jscDuTm5vLaX6fbb78du3fvxs6dO+XXoEGDMGHCBOzcuROdOnXitfaTYcOGNVpi4MCBA0hKSgLA39f+VFNTA5XK+2NNrVbLt2bzWgfGtVzXgQMHQqvVeh1TUlKCn3/+2T/X3ucW4jbIc2v2J598Ivbs2SOmTZsmwsLCxNGjR5UurUV75plnhMlkEuvXrxclJSXyq6amRj7mrbfeEiaTSSxbtkzs3r1bPProo7yt0k8uvptJCF5rf9m6davQaDTiT3/6kzh48KD4xz/+IUJDQ8Xnn38uH8Nr7R8TJ04UHTp0kG/NXrZsmYiJiRHTp0+Xj+G1bprKykqxY8cOsWPHDgFAzJ49W+zYsUNekuRaruvkyZNFQkKCWLNmjdi+fbu47bbbeGu20j788EORlJQkdDqdGDBggHz7MDUdgMu+FixYIB/jdrvF66+/LiwWi9Dr9eKWW24Ru3fvVq7oVuTSMMNr7T///ve/Ra9evYRerxfdunUT8+fP99rPa+0fNptNvPDCC6Jjx44iJCREdOrUScycOVPY7Xb5GF7rplm3bt1l/36eOHGiEOLarmttba147rnnRLt27YTBYBB33323KCoq8kt9khBC+D6+Q0RERKQM9swQERFRi8YwQ0RERC0awwwRERG1aAwzRERE1KIxzBAREVGLxjBDRERELRrDDBEREbVoDDNERETUojHMEJHfJCcnY86cOdd8/Pr16yFJEs6dOxewmpqT670+RHRtNEoXQETKSU9PR79+/fz2AZuXl4ewsLBrPj4tLQ0lJSUwmUx++f5E1DYxzBDRVQkh4HK5oNH88l8X7du3v65z63Q6WCyWppZGRASA00xEbdakSZOQm5uL9957D5IkQZIkHD16VJ76WbVqFQYNGgS9Xo+NGzfi8OHDuPfee2E2mxEeHo7BgwdjzZo1Xue8dBpFkiT87W9/w/3334/Q0FCkpqZixYoV8v5Lp5k+/fRTREZGYtWqVejevTvCw8Nx1113oaSkRH6P0+nE1KlTERkZiejoaLz66quYOHEi7rvvvqv+vJs2bcItt9wCg8GAxMRETJ06FdXV1V61v/nmmxg/fjzCw8MRHx+PDz74wOscRUVFuPfeexEeHo6IiAg89NBDOHXqlNcxK1aswKBBgxASEoKYmBiMGzfOa39NTQ2efPJJGI1GdOzYEfPnz79q3UT0yxhmiNqo9957D0OHDsVTTz2FkpISlJSUIDExUd4/ffp0ZGdnY+/evejTpw+qqqowevRorFmzBjt27MCdd96JsWPHoqio6Krf54033sBDDz2EXbt2YfTo0ZgwYQLOnj17xeNramrwl7/8BX//+9+xYcMGFBUV4eWXX5b3v/322/jHP/6BBQsW4IcffoDNZsPy5cuvWsPu3btx5513Yty4cdi1axeWLFmC77//Hs8995zXcX/+85/Rp08fbN++HTNmzMCLL76InJwcAA0jVPfddx/Onj2L3Nxc5OTk4PDhw3j44Yfl93/zzTcYN24cxowZgx07dmDt2rUYNGiQ1/d45513MGjQIOzYsQNTpkzBM888g3379l21fiL6BX559jYRtUgjRowQL7zwgte2devWCQBi+fLlv/j+Hj16iA8++ED+OikpSbz77rvy1wDEa6+9Jn9dVVUlJEkS3377rdf3qqioEEIIsWDBAgFAHDp0SH7Phx9+KMxms/y12WwWf/7zn+WvnU6n6Nixo7j33nuvWOfjjz8ufv3rX3tt27hxo1CpVKK2tlau/a677vI65uGHHxajRo0SQgixevVqoVarRVFRkby/oKBAABBbt24VQggxdOhQMWHChCvWkZSUJB577DH5a7fbLWJjY8W8efOu+B4i+mUcmSGiy7p0RKG6uhrTp09Hjx49EBkZifDwcOzbt+8XR2b69Okj/zosLAxGoxFlZWVXPD40NBSdO3eWv46Li5OPt1qtOHXqFG688UZ5v1qtxsCBA69aQ35+Pj799FOEh4fLrzvvvBNutxuFhYXycUOHDvV639ChQ7F3714AwN69e5GYmOg1euW5Fp5jdu7cidtvv/2qtVx8PSRJgsViuer1IKJfxgZgIrqsS+9KeuWVV7Bq1Sr85S9/QZcuXWAwGPDAAw/A4XBc9Txardbra0mS4Ha7r+t4IUSjbRe7dP+l3G43nn76aUydOrXRvo4dO171vZ7vJYRo9H0v3W4wGK56LuD6rwcR/TKOzBC1YTqdDi6X65qO3bhxIyZNmoT7778fvXv3hsViwdGjRwNb4CVMJhPMZjO2bt0qb3O5XNixY8dV3zdgwAAUFBSgS5cujV46nU4+bvPmzV7v27x5M7p16wagYRSmqKgIxcXF8v49e/bAarWie/fuABpGXdauXevzz0lE14cjM0RtWHJyMrZs2YKjR48iPDwc7dq1u+KxXbp0wbJlyzB27FhIkoTf//73iowoPP/888jOzkaXLl3QrVs3fPDBB6ioqLjsqInHq6++iptuugnPPvssnnrqKYSFhWHv3r3IycnxumPphx9+wKxZs3DfffchJycHS5cuxTfffAMAuOOOO9CnTx9MmDABc+bMgdPpxJQpUzBixAh5Su7111/H7bffjs6dO+ORRx6B0+nEt99+i+nTpwf2ohC1cRyZIWrDXn75ZajVavTo0QPt27e/av/Lu+++i6ioKKSlpWHs2LG48847MWDAgCBW2+DVV1/Fo48+iieeeAJDhw6V+19CQkKu+J4+ffogNzcXBw8exM0334z+/fvj97//PeLi4ryO+81vfoP8/Hz0798fb775Jt555x3ceeedABqmg5YvX46oqCjccsstuOOOO9CpUycsWbJEfn96ejqWLl2KFStWoF+/frjtttuwZcuWwFwIIpJJ4pcmm4mImjG3243u3bvjoYcewptvvtnk8yQnJ2PatGmYNm2a/4ojoqDgNBMRtSjHjh3D6tWrMWLECNjtdsydOxeFhYUYP3680qURkUI4zURELYpKpcKnn36KwYMHY9iwYdi9ezfWrFkjN+ESUdvDaSYiIiJq0TgyQ0RERC0awwwRERG1aAwzRERE1KIxzBAREVGLxjBDRERELRrDDBEREbVoDDNERETUojHMEBERUYv2/wPqfqgbnw0V1gAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 为对比学习负采样准备词频率分布\n",
    "vocab_size = len(dataset.token2id)\n",
    "embed_size = 128\n",
    "distribution = dataset.get_word_distribution()\n",
    "print(distribution)\n",
    "model = SkipGramNCE(vocab_size, embed_size, distribution)\n",
    "\n",
    "from torch.utils.data import DataLoader\n",
    "from torch.optim import SGD, Adam\n",
    "\n",
    "# 定义静态方法collate_batch批量处理数据，转化为PyTorch可以需要的张量类型\n",
    "class DataCollator:\n",
    "    @classmethod\n",
    "    def collate_batch(cls, batch):\n",
    "        batch = np.array(batch)\n",
    "        input_ids = torch.tensor(batch[:, 0], dtype=torch.long)\n",
    "        labels = torch.tensor(batch[:, 1], dtype=torch.long)\n",
    "        return {'input_ids': input_ids, 'labels': labels}\n",
    "\n",
    "# 定义训练参数以及训练循环\n",
    "epochs = 100\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "epoch_loss = []\n",
    "\n",
    "data_collator = DataCollator()\n",
    "dataloader = DataLoader(data, batch_size=batch_size, shuffle=True,\\\n",
    "    collate_fn=data_collator.collate_batch)\n",
    "optimizer = Adam(model.parameters(), lr=learning_rate)\n",
    "model.zero_grad()\n",
    "model.train()\n",
    "\n",
    "# 需要提前安装tqdm\n",
    "from tqdm import trange\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 训练过程，每步读取数据，送入模型计算损失，并使用PyTorch进行优化\n",
    "with trange(epochs, desc='epoch', ncols=60) as pbar:\n",
    "    for epoch in pbar:\n",
    "        for step, batch in enumerate(dataloader):\n",
    "            loss = model(**batch)\n",
    "            pbar.set_description(f'epoch-{epoch}, loss={loss.item():.4f}')\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            model.zero_grad()\n",
    "        epoch_loss.append(loss.item())\n",
    "    \n",
    "epoch_loss = np.array(epoch_loss)\n",
    "plt.plot(range(len(epoch_loss)), epoch_loss)\n",
    "plt.xlabel('training epoch')\n",
    "plt.ylabel('loss')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9430e9a",
   "metadata": {},
   "source": [
    "TF-IDF加权\n",
    "\n",
    "定义词频率（term frequency）。注意到不同长度的文章词频率会有较大差距，不利于比较和运算，因此可以对词频率取对数。\n",
    "\n",
    "$$\\text{tf}_{t,d} = \\log (\\text{count}(t,d) + 1)$$\n",
    "\n",
    "其中$\\text{count}(t,d)$表示词$t$在文档$d$中出现的次数，为了避免对0取对数，把所有的计数加1。\n",
    "\n",
    "那么如何区分高频词与低频词呢？TF-IDF引入了另一个重要的评价指标——文档频率（document frequency），即一个词在语料库所包含的多少篇文档中出现。在所有文档里出现的词往往是虚词或是常见实词，而只在少量文档里出现的词往往是具有明确含义的实词并且具有很强的文档区分度。用$\\text{df}_t$来表示在多少篇文档中出现了词$t$。\n",
    "\n",
    "为了压低高频词和提升低频词的影响，TF-IDF使用文档频率的倒数，也就是逆向文档频率（inverse document frequency）来对词频率进行加权。这很好理解，一个词的文档频率越高，其倒数就越小，权重就越小。\n",
    "\n",
    "$$\\text{idf}_t = \\log \\frac{N}{\\text{df}_t}$$\n",
    "\n",
    "其中$N$表示文档总数。为了避免分母为0，通常会将分母改为$\\text{df}_t+1$。\n",
    "\n",
    "基于词频率和逆向文档频率，得到TF-IDF的最终值为：\n",
    "\n",
    "$$w_{t,d} = \\text{tf}_{t,d} \\times \\text{idf}_{t}$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f765e353",
   "metadata": {},
   "source": [
    "很多情况下会额外对文档的TF-IDF向量使用L2归一化，使得不同文档的TF-IDF向量具有相同的模长，便于相互比较。\n",
    "下面给出了TF-IDF的代码实现。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9ce8e610",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TFIDF:\n",
    "    def __init__(self, vocab_size, norm='l2', smooth_idf=True,\\\n",
    "                 sublinear_tf=True):\n",
    "        self.vocab_size = vocab_size\n",
    "        self.norm = norm\n",
    "        self.smooth_idf = smooth_idf\n",
    "        self.sublinear_tf = sublinear_tf\n",
    "    \n",
    "    def fit(self, X):\n",
    "        doc_freq = np.zeros(self.vocab_size, dtype=np.float64)\n",
    "        for data in X:\n",
    "            for token_id in set(data):\n",
    "                doc_freq[token_id] += 1\n",
    "        doc_freq += int(self.smooth_idf)\n",
    "        n_samples = len(X) + int(self.smooth_idf)\n",
    "        self.idf = np.log(n_samples / doc_freq) + 1\n",
    "    \n",
    "    def transform(self, X):\n",
    "        assert hasattr(self, 'idf')\n",
    "        term_freq = np.zeros((len(X), self.vocab_size), dtype=np.float64)\n",
    "        for i, data in enumerate(X):\n",
    "            for token in data:\n",
    "                term_freq[i, token] += 1\n",
    "        if self.sublinear_tf:\n",
    "            term_freq = np.log(term_freq + 1)\n",
    "        Y = term_freq * self.idf\n",
    "        if self.norm:\n",
    "            row_norm = (Y**2).sum(axis=1)\n",
    "            row_norm[row_norm == 0] = 1\n",
    "            Y /= np.sqrt(row_norm)[:, None]\n",
    "        return Y\n",
    "    \n",
    "    def fit_transform(self, X):\n",
    "        self.fit(X)\n",
    "        return self.transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "475d4b8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting torchNote: you may need to restart the kernel to use updated packages.\n",
      "\n",
      "  Using cached torch-2.6.0-cp311-cp311-win_amd64.whl.metadata (28 kB)\n",
      "Requirement already satisfied: filelock in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.9.0)\n",
      "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (4.12.2)\n",
      "Requirement already satisfied: networkx in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.1)\n",
      "Requirement already satisfied: jinja2 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.1.2)\n",
      "Requirement already satisfied: fsspec in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (2023.3.0)\n",
      "Requirement already satisfied: sympy==1.13.1 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (1.13.1)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from sympy==1.13.1->torch) (1.3.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from jinja2->torch) (2.1.1)\n",
      "Using cached torch-2.6.0-cp311-cp311-win_amd64.whl (204.2 MB)\n",
      "Installing collected packages: torch\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Ignoring invalid distribution ~ (C:\\Users\\Doggie\\anaconda3\\Lib\\site-packages)\n",
      "WARNING: Ignoring invalid distribution ~ (C:\\Users\\Doggie\\anaconda3\\Lib\\site-packages)\n",
      "ERROR: Could not install packages due to an OSError: [WinError 5] 拒绝访问。: 'C:\\\\Users\\\\Doggie\\\\anaconda3\\\\Lib\\\\site-packages\\\\torch\\\\lib\\\\uv.dll'\n",
      "Consider using the `--user` option or check the permissions.\n",
      "\n",
      "\n",
      "[notice] A new release of pip is available: 24.3.1 -> 25.0.1\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "pip install torch --upgrade\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b5394caa",
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (609037931.py, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  Cell \u001b[1;32mIn[11], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m    python -c \"import torch; print(torch.__version__)\"\u001b[0m\n\u001b[1;37m              ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "python -c \"import torch; print(torch.__version__)\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "2fcefcc4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://download.pytorch.org/whl/cpu\n",
      "Requirement already satisfied: torch in c:\\users\\doggie\\anaconda3\\lib\\site-packages (2.6.0)\n",
      "Collecting torchvision\n",
      "  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp311-cp311-win_amd64.whl.metadata (6.3 kB)\n",
      "Requirement already satisfied: filelock in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.9.0)\n",
      "Requirement already satisfied: typing-extensions>=4.10.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (4.12.2)\n",
      "Requirement already satisfied: networkx in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.1)\n",
      "Requirement already satisfied: jinja2 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (3.1.2)\n",
      "Requirement already satisfied: fsspec in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (2023.3.0)\n",
      "Requirement already satisfied: sympy==1.13.1 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torch) (1.13.1)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from sympy==1.13.1->torch) (1.3.0)\n",
      "Requirement already satisfied: numpy in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torchvision) (1.24.3)\n",
      "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from torchvision) (9.4.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\doggie\\anaconda3\\lib\\site-packages (from jinja2->torch) (2.1.1)\n",
      "Downloading https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp311-cp311-win_amd64.whl (1.6 MB)\n",
      "   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--\n",
      "   -------------------------- ------------- 1.0/1.6 MB 7.2 MB/s eta 0:00:01\n",
      "   ---------------------------------------- 1.6/1.6 MB 8.3 MB/s eta 0:00:00\n",
      "Installing collected packages: torchvision\n",
      "Successfully installed torchvision-0.21.0+cpu\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Ignoring invalid distribution ~ (C:\\Users\\Doggie\\anaconda3\\Lib\\site-packages)\n",
      "WARNING: Ignoring invalid distribution ~ (C:\\Users\\Doggie\\anaconda3\\Lib\\site-packages)\n",
      "WARNING: Ignoring invalid distribution ~ (C:\\Users\\Doggie\\anaconda3\\Lib\\site-packages)\n",
      "\n",
      "[notice] A new release of pip is available: 24.3.1 -> 25.0.1\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8d6fbbf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b38e8bd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip uninstall torch torchvision torchaudio  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0f796a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install torch torchvision torchaudio  "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
